package com.zq.it.tool;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Html2Text
{
  public static String Html2TextFormate(String inputString)
  {
    String htmlStr = inputString;
    String textStr = "";
    try
    {
      String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
      String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>";
      String regEx_html = "<[^>]+>";
      Pattern p_script = Pattern.compile(regEx_script, 2);
      Matcher m_script = p_script.matcher(htmlStr);
      htmlStr = m_script.replaceAll("");

      Pattern p_style = Pattern.compile(regEx_style, 2);
      Matcher m_style = p_style.matcher(htmlStr);
      htmlStr = m_style.replaceAll("");

      Pattern p_html = Pattern.compile(regEx_html, 2);
      Matcher m_html = p_html.matcher(htmlStr);
      htmlStr = m_html.replaceAll("");
      
      htmlStr = htmlStr.replaceAll("&gt;", ">");
      htmlStr = htmlStr.replaceAll("&lt;", "<");
      htmlStr = htmlStr.replaceAll("&nbsp;", " ");
      htmlStr = htmlStr.replaceAll(" &nbsp;", " ");
      htmlStr = htmlStr.replaceAll("&quot;", "\"");
      htmlStr = htmlStr.replaceAll("&#39;", "\'");
      htmlStr = htmlStr.replaceAll("<br/> ", "\n");
      
      textStr = htmlStr;
    }
    catch (Exception e) {
      System.err.println("Html2Text: " + e.getMessage());
    }

    return textStr;
  }
}